**************************************************
*              Estimation Programs               *
*                 Adam M. Guren                  *
*                August 26, 2013                 *
*             Updated: May 8, 2016              *
*												 *
* Programs used in the microdata analysis.       *
*												 *
* These include:                                 *
* 1) semipar_visual: For semi-parametric graphs  *
*     this is basically a wrapper for binscatter *
* 2) semipar_table_poly: For creating a table    *
*      with a polynomial fit.                    *
* 3) semipar_table_poly_se: Same as above with   *
*     bootstrapped standard errors.              *
* 4) semipar_table_poly_toboot: Function         *
*     called by both table and table_se that is  *
*     bootstrappable.                            *
* 5) semipar_visual_iv: Semi-parametric IV graphs*
*     both first and second stage. Run as a      *
*     control function.                          *
* 6) semipar_visual_iv_poly: Same as before     *
*     with overlayed polynomial.                 *
* 7) semipar_visual_iv_poly_se: Same as before  *
*     with overlayed polynomial and pointwise    *
*     se bands.                                  *
* 8) semipar_iv_visual_topo: Polynomials for    *
*     multiple different times to sale.          *
* 9) semipar_iv_table_poly: For creating a      *
*      table  with a polynomial fit.             *
* 10) semipar_iv_table_poly_se: Same as above    *
*     with bootstrapped standard errors.         *
* 11) semipar_iv_table_poly_toboot:              *
*     Function called by both table and table_se *
*     that is bootstrappable.                    *
* 12) semipar_iv_visual_geo: Polynomials for     *
*       multiple different geographies           *
**************************************************


/* 1) semipar_visual
 Syntax:
    semipar visual y x
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
    nbins(interger) - number of bins
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter as a csv and do file
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	fxsize fysize - for resizing graphs
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/    

cap program drop semipar_visual
program semipar_visual
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=2 max=2) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) nbins(integer 20) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) addxmean addymean log_y level_x fxsize(real 100) fysize(real 100) b_by(varlist numeric)]
		
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	qui {
		marksample touse

		* Parse varlist
		local y : word 1 of `varlist'
		local x : word 2 of `varlist'
		
		
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
			cap drop toabsorb
			local regtype `"areg"'
			egen toabsorb = group(`absorb')
			local absorb `"absorb(toabsorb)"'

		}
		else {
			local regtype `"reg"'
		}
		
		* Get Weightvar
		local wtvar: word 2 of `wt'
		local wtvar = subinstr("`wtvar'","]","",1)
	}
	
	* For speed preserve and keep only what I need
    preserve
	keep if `touse'
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
		drop b_by_group
	}
	
	* Keep what need
	if "`regtype'" == "areg" {
		qui keep `x' `y' `controls' `wtvar' `touse' toabsorb
	}
	else {
		qui keep `x' `y' `controls' `wtvar' `touse'
	}
	
	* Drop missing controls
	foreach var of varlist `controls' {
		drop if `var' == .
	}
	
	count if `touse'
	
	// drop fixed effects cells with less observations than minfecell
	qui egen countfecell = count(toabsorb), by(toabsorb)
	drop if countfecell < `minfecell'
	
	
	* Residualize x to get nu
	tempvar x_sample y_sample tot_sample
	qui `regtype' `x' `controls' `wt'  if `touse', `absorb'		
	qui predict x_r if `touse', residuals
	qui gen byte `x_sample' = e(sample)
	if "`addxmean'" ~= "" | "`level_x'" ~= ""{
		qui summarize `x' `wt' if `touse', meanonly
		qui replace x_r=x_r+`r(mean)'
	}
	
	* Report sample size before dropping percentile
	count if `x_sample'
		
	* Drop percentiles from either end of x variable before making figure if included in command
	if `pctiletodrop' ~= 0 {
		local toppctiletodrop = 100 - `pctiletodrop'
		_pctile x_r, percentiles (`pctiletodrop' `toppctiletodrop')
		drop if x_r < `r(r1)' | x_r > `r(r2)'
	}
	count if `x_sample'
	
	* Drop entirely unique observations
	egen count_toabsorb = count(toabsorb), by(toabsorb)
	drop if count_toabsorb == 1
	count if `x_sample'
	
	* Create `nbins' bins and residualize y on dummies and bins
	fastxtile x_xtile =  x_r `wt' if `x_sample', nq(`nbins')
	qui `regtype' `y' i.x_xtile `wt' if `touse', `absorb'
	* Use predict to recover the values for each xtile.
	qui predict y_r if `touse'
	qui gen byte `y_sample' = e(sample)
	if "`addymean'" == "" {
		qui summarize `y' `wt' if `touse', meanonly
		qui replace y_r=y_r-`r(mean)'
	}	
	
	* Transform x and y into logs/levels as instructed
	if "`log_y'" ~= "" {
		replace y_r = log(y_r)
	}
	if "`level_x'" ~= "" {
		replace x_r = exp(x_r)
		if "`addxmean'" == "" {
			qui sum x_r `wt', meanonly
			replace x_r = x_r - `r(mean)'
		}
	}
	
	* Binscatter
	if "`savegraph'" == "" & "`savedata'" == "" {
		qui binscatter y_r x_r `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") fxsize(`fxsize') fysize(`fysize') bgcolor(white) graphregion(color(white))
	}
	if "`savegraph'" == "" & "`savedata'" ~= "" { 
		qui binscatter y_r x_r `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("`savedata'") fxsize(`fxsize') fysize(`fysize') bgcolor(white) graphregion(color(white)) replace
	}
	if "`savegraph'" ~= "" & "`savedata'" == "" { '
		qui binscatter y_r x_r `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") fxsize(`fxsize') fysize(`fysize') bgcolor(white) graphregion(color(white)) replace
	}
	if "`savegraph'" ~= "" & "`savedata'" ~= "" { 
		qui binscatter y_r x_r `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") savedata("`savedata'") fxsize(`fxsize') fysize(`fysize') bgcolor(white) graphregion(color(white)) replace
	}
	
	restore
end

/* 2) semipar_table_poly
 Syntax:
    semipar visual y x
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/   

cap program drop semipar_table_poly
program semipar_table_poly
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=2 max=2) [if] [, absorb(varlist) minfecell(integer 0) controls(varlist numeric) poly_order(integer 3) pctiletodrop(real 0) addxmean addymean level_x b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	qui marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	
	qui {
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
			cap drop toabsorb
			local regtype `"areg"'
			egen toabsorb = group(`absorb')
			local absorb `"absorb(toabsorb)"'
		}
		else {
			local regtype `"reg"'
		}
		
		* Get Weightvar
		local wtvar: word 2 of `wt'
		local wtvar = subinstr("`wtvar'","]","",1)
		
		preserve

		
		* Alternate controls if b_by is nonempty
		if "`b_by'" ~= "" {
			egen b_by_group = group(`b_by')
			qui levelsof b_by_group, local(levels)
			local controls2 = ""
			foreach var in `controls' {
				foreach level in `levels' {
					qui gen `var'_`level' = `var' if b_by_group == `level'
					qui replace `var'_`level' = 0 if b_by_group ~= `level'
					qui replace `var'_`level' = . if `var' == .
					local controls2 = "`controls2' `var'_`level'"
				}
			}
			di "`controls2'"
			local controls = "`controls2'"
			drop b_by_group
		}
	}
	
	* For speed preserve and keep only what I need
	if "qui `regtype'" == "areg" {
		qui keep `x' `y' `controls' `wtvar' `touse' toabsorb
	}
	else {
		qui keep `x' `y' `controls' `wtvar' `touse' toabsorb
	}
	
	foreach var of varlist `controls' {
		drop if `var' == .
	}
	
	// drop fixed effects cells with less observations than minfecell
	qui egen countfecell = count(toabsorb), by(toabsorb)
	drop if countfecell < `minfecell'
	
	// Call semipar_table_poly_toboot for esimtation
	semipar_table_poly_toboot `y' `x' `wt' if `touse', `absorb' controls(`controls') poly_order(`poly_order') pctiletodrop(`pctiletodrop') `addxmean' `addymean' `level_x'
	count if e(sample) == 1

	restore
end

/* 3) semipar_table_poly_se
 Syntax:
    semipar visual y x
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	bootstrapreps - number of bootstrap reps for calculating se
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/   

cap program drop semipar_table_poly_se
program semipar_table_poly_se
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=2 max=2) [if] [, absorb(varlist) minfecell(integer 0) controls(varlist numeric) bootstrapreps(integer 100) poly_order(integer 3) pctiletodrop(real 0) addxmean addymean level_x cluster(varlist) b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	qui {
		marksample touse

		* Parse varlist
		local y : word 1 of `varlist'
		local x : word 2 of `varlist'
		
		
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
			cap drop toabsorb
			local regtype `"areg"'
			egen toabsorb = group(`absorb')
			local absorb `"absorb(toabsorb)"'
		}
		else {
			local regtype `"reg"'
		}
		
		* Get Weightvar
		local wtvar: word 2 of `wt'
		local wtvar = subinstr("`wtvar'","]","",1)
		
		* For speed preserve and keep only what I need
		preserve
		
		* Alternate controls if b_by is nonempty
		if "`b_by'" ~= "" {
			egen b_by_group = group(`b_by')
			qui levelsof b_by_group, local(levels)
			local controls2 = ""
			foreach var in `controls' {
				foreach level in `levels' {
					qui gen `var'_`level' = `var' if b_by_group == `level'
					qui replace `var'_`level' = 0 if b_by_group ~= `level'
					qui replace `var'_`level' = . if `var' == .
					local controls2 = "`controls2' `var'_`level'"
				}
			}
			di "`controls2'"
			local controls = "`controls2'"
			drop b_by_group
		}
	}
	
	* Keep only needed variables
	if "`regtype'" == "areg" {
		qui keep `x' `y' `controls' `wtvar' `touse' toabsorb `cluster'
	}
	else {
		qui keep `x' `y' `controls' `wtvar' `touse' toabsorb `cluster'
	}
	
	foreach var of varlist `controls' {
		drop if `var' == .
	}
	
	// drop fixed effects cells with less observations than minfecell
	qui egen countfecell = count(toabsorb), by(toabsorb)
	drop if countfecell < `minfecell'
	
	* Cluster logic
	if "`cluster'" ~= "" {
		local cluster = "cluster(`cluster')"
	}
	
	* Now run bootstrapreps bootsraps of semipar_table_poly_forbootstrap.
	* Note: The "right" way to do this is to use the clusterid and group options of
	* the bootstrap command, which create a new identifier for each separate cluster
	* for the estimation command. This makes it so that when one cluster is sampled multiple
	* times, each copy of the ecluster is treated as a separate version for fixed effects.
	* However, all I do with the zip5 variable within zip3 clusters is demean (zip5 x date FE), and this is the 
	* same transformation regardless of whether I treat all of the versions of a cluster as the same ZIP5 or not.
	* Thus this extra step is not necessary, and I can leave out the clusterid and group options.
	bootstrap , reps(`bootstrapreps') seed(1000) force `cluster': semipar_table_poly_toboot `y' `x' `wt' if `touse', `absorb' controls(`controls') poly_order(`poly_order') pctiletodrop(`pctiletodrop') `addxmean' `addymean' `level_x'
	estat bootstrap, percentile

	restore
end

/* 4) semipar_table_poly_toboot
 Syntax:
    semipar visual y x
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
    controls(varlist) - adds varlist as control variables
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
*/   

cap program drop semipar_table_poly_toboot
program semipar_table_poly_toboot, eclass
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=2 max=2) [if] [, absorb(varlist) controls(varlist numeric) poly_order(integer 3) pctiletodrop(real 0) level_x addxmean addymean]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse
	keep if `touse'

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		local regtype `"areg"'
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	

	* Residualize x to get eta
	tempvar x_sample y_sample 
	qui `regtype' `x' `controls' `wt'  if `touse', `absorb'		
	gen byte `x_sample' = e(sample)
	predict x_r if `touse', residuals	
	if "`addxmean'" ~= "" {
		qui summarize `x' `wt' if `touse', meanonly
		qui replace x_r=x_r+`r(mean)'
	}
	
	* Transform eta into levels if instructed
	if "`level_x'" ~= "" {
		replace x_r = exp(x_r)
	}
	
	* Drop percentiles from either end of x variable before making figure if included in command
	if `pctiletodrop' ~= 0 {
		local toppctiletodrop = 100 - `pctiletodrop'
		_pctile x_r, percentiles (`pctiletodrop' `toppctiletodrop')
		gen pctiletodrop =  x_r < `r(r1)' | x_r > `r(r2)'
	}
	else {
		gen pctiletodrop = 0
	}
		
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen x_r_`i' = x_r^`i'
		}
	}	
	* Run regression
	di "here at reg"
	`regtype' `y' x_r* `wt' if pctiletodrop == 0, `absorb'
	gen byte `y_sample' = e(sample)
	tempname bb
	matrix `bb' = e(b)
	
	// Ereturn logic.
	mark tot_sample if `x_sample'
	ereturn post `bb', esample(tot_sample)
	ereturn local cmd ="bootstrap"
	drop x_r* pctiletodrop
	// For info on why this is the right way to do this, see
	// http://www.stata.com/support/faqs/statistics/bootstrapping-vectors/
end


/* 5) semipar_iv_visual
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
    nbins(interger) - number of bins
	fs_order(interger) - order of pollynomial for first stage and control functions 
	winsorfs: Winsorize the first stage plot.
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
	polyfs - add a polynomial overlay to the first stage
*/    

cap program drop semipar_iv_visual
program semipar_iv_visual
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) nbins(integer 20) winsorfs fs_order(integer 2) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) polyfs addxmean addymean log_y level_x b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
	
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		di "absorb: `absorb'"
		cap drop toabsorb
		local regtype `"areg"'
		qui egen toabsorb = group(`absorb')
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* Get Weightvar
	local wtvar: word 2 of `wt'
	local wtvar = subinstr("`wtvar'","]","",1)
	
	* For speed preserve and keep only what I need
	preserve
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
	}
	
	* Keep only what is necessary
		if "`regtype'" == "areg" {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb 

		}
		else {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb 
		}
		
		qui keep if `touse'
		
		foreach var of varlist `controls' {
			drop if `var' == .
		}
		
		count if `touse'
		
		// drop fixed effects cells with less observations than minfecell
		qui egen countfecell = count(toabsorb), by(toabsorb)
		drop if countfecell < `minfecell'

		* First stage
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt' if `touse', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt' if `touse', meanonly
			qui replace fz = fz-`r(mean)'
		}
	qui{
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" { // add x mean if requested
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}
		
		if `pctiletodrop' ~= 0 & "`winsorfs'" ~= "" { // winsorizing code
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile `x', percentiles (`pctiletodrop' `toppctiletodrop')
			gen towins = `x' < `r(r1)' | `x' > `r(r2)'
			local winsorfs = "& towins == 0"
			* May need to recenter after winsor.
			qui sum fz `wt' if `touse' `winsorfs', meanonly
			qui replace fz = fz-`r(mean)'
		}	
		
		// demean x
		qui sum `x' `wt', meanonly
		gen x_demeaned = `x' - `r(mean)'
		
		tempfile temporary_file
		save "`temporary_file'", replace
		
		// Binscatter of first stage
		binscatter x_demeaned `z' `wt' if `touse' `winsorfs', nbins(25) title(a. First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) savedata("temp") bgcolor(white) graphregion(fcolor(white)) 
		
		// get binscatter data
		clear
		do temp.do
		cap erase temp.do
		cap erase temp.csv	
		
		ren x_demeaned y_binscatter
		ren `z' x_binscatter
		append using "`temporary_file'"
		
		sort `z'
		if "`polyfs'" == "polyfs" { // if polynomial first stage, add in polynomial
			qui sum x_binscatter
			twoway (scatter y_binscatter x_binscatter) (line fz `z' if mod(_n,1000) == 0 & `z' >= `r(min)' & `z' <= `r(max)', lcolor(gs10)), legend(off) title(a. First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") graphregion(fcolor(white)) bgcolor(white)
		}
		// save first stage graph
		if "`savegraph'" ~= "" {
			graph export "`savegraph'_firststage.`graphtype'", replace
			graph save "`savegraph'_firststage.gph", replace
		}
		
		
		cap drop winsorfs
		
	
		* Drop percentiles from either end of x variable before making figure if included in command
		if `pctiletodrop' ~= 0 {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
			drop if fz < `r(r1)' | fz > `r(r2)'
		}
		
		count if `x_sample' == 1
		
		* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb), by(toabsorb)
		drop if count_toabsorb == 1
		
		count if `x_sample' == 1
		
		* Create `nbins' bins and residualize y on dummies and bins
      fastxtile x_xtile = fz `wt' if `x_sample', nq(`nbins')
      if "`b_by'" ~= "" {
       	  egen b_by_group = group(`b_by')
           qui gen y_r = .
           qui gen `y_sample' = 
           qui levelsof b_by_group, local(levels)
		     local controls2 = ""
           foreach level in `levels' {
		          qui `regtype' `y' i.x_xtile `wt' if `touse' & b_by_group == `level', `absorb'
                qui predict y_r_`level' if `touse' & b_by_group == `level'
           	    qui gen byte y_sample_`level'' = e(sample)
        			 qui summarize `y' `wt' if `touse' & b_by_group == `level', meanonly
                qui replace y_r =y_r_`level'-`r(mean)' if b_by_group == `level'
                replace `y_sample' = `y_sample' + y_sample_`level'
                drop y_sample_`level' y_r_`level'
           }
      }
      else {
		* Use predict to recover the values for each xtile.
		qui `regtype' `y' i.x_xtile `wt' if `touse', `absorb'
		qui predict y_r if `touse'
		qui gen byte `y_sample' = e(sample)
		if "`addymean'" == "" {
			qui summarize `y' `wt' if `touse', meanonly
			qui replace y_r=y_r-`r(mean)'
		}
		}
		* Transform x and y into logs/levels as instructed
		if "`log_y'" ~= "" {
			replace y_r = log(y_r)
		}
		if "`level_x'" ~= "" {
			replace fz = exp(fz)
		}
		
	}
	
	qui gen byte `tot_sample' = `y_sample' & `x_sample'
	count if `tot_sample' == 1	
		
* Binscatter
if "`b_by'" ~= "" {
	if "`savegraph'" == "" & "`savedata'" == "" {
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") bgcolor(white) by(b_by_group)
	}
	if "`savegraph'" == "" & "`savedata'" ~= "" { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins')  title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("`savedata'") replace bgcolor(white) by(b_by_group)
	}
	if "`savegraph'" ~= "" & "`savedata'" == "" { '
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") replace bgcolor(white) by(b_by_group)
	}
	if "`savegraph'" ~= "" & "`savedata'" ~= "" { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") savedata("`savedata'") replace bgcolor(white) by(b_by_group)
}
else {
	if "`savegraph'" == "" & "`savedata'" == "" {
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") bgcolor(white)
	}
	if "`savegraph'" == "" & "`savedata'" ~= "" { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins')  title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("`savedata'") replace bgcolor(white)
	}
	if "`savegraph'" ~= "" & "`savedata'" == "" { '
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") replace bgcolor(white)
	}
	if "`savegraph'" ~= "" & "`savedata'" ~= "" { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savegraph("`savegraph'.`graphtype'") savedata("`savedata'") replace bgcolor(white)
}
}
	restore
end

/* 6) semipar_iv_visual_poly
SAME AS ABOVE EXCEPT OVERLAYING A LOCAL POLYNOMIAL FIT, NO FIRST STAGE PLOT
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
    nbins(interger) - number of bins
	winsorfs: Winsorize the first stage plot.  Does not do anything here becasue first stage plot is not displayed, but kept for consistent syntax.
	fs_order(interger) - order of pollynomial for first stage and control functions 
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.

*/    

cap program drop semipar_iv_visual_poly
program semipar_iv_visual_poly
* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) nbins(integer 20) winsorfs poly_order(integer 3) fs_order(integer 3) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) addxmean addymean log_y level_x b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
	
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		di "absorb: `absorb'"
		cap drop toabsorb
		local regtype `"areg"'
		qui egen toabsorb = group(`absorb')
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* Get Weightvar
	local wtvar: word 2 of `wt'
	local wtvar = subinstr("`wtvar'","]","",1)
	
	* For speed preserve and keep only what I need
	preserve
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
		drop b_by_group
	}
	
	* Keep only what is necessary
		if "`regtype'" == "areg" {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb
		}
		else {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb
		}
		
		qui keep if `touse'
		
		foreach var of varlist `controls' {
			drop if `var' == .
		}
		
		count if `touse'
		// drop fixed effects cells with less observations than minfecell
		qui egen countfecell = count(toabsorb), by(toabsorb)
		drop if countfecell < `minfecell'

		* First stage
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt', meanonly
			qui replace fz = fz-`r(mean)'
		}
	qui{
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" {
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}
		
		if `pctiletodrop' ~= 0 & "`winsorfs'" ~= "" {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile `x', percentiles (`pctiletodrop' `toppctiletodrop')
			gen towins = `x' < `r(r1)' | `x' > `r(r2)'
			local winsorfs = "& towins == 0"
		}	
		
		// Note First stage binscatters are commented out here. For a first_stage binscatter use version without polynomial.
		if "`savegraph'" ~= "" {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) savegraph("`savegraph'_firststage.`graphtype'")
		}
		else {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none)
		}
		cap drop winsorfs
		
	
		* Drop percentiles from either end of x variable before making figure if included in command
		if `pctiletodrop' ~= 0 {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
			drop if fz < `r(r1)' | fz > `r(r2)'
		}
		
		di "here"
		count if `x_sample' == 1
		
		* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb), by(toabsorb)
		drop if count_toabsorb == 1
		
		count if `x_sample' == 1
		
		* Create `nbins' bins and residualize y on dummies and bins
		fastxtile x_xtile = fz `wt' if `x_sample', nq(`nbins')
		qui `regtype' `y' i.x_xtile `wt' if `touse', `absorb'
		* Use predict to recover the values for each xtile.
		qui predict y_r if `touse'
		qui gen byte `y_sample' = e(sample)
		if "`addymean'" == "" {
			qui summarize `y' `wt' if `touse', meanonly
			qui replace y_r=y_r-`r(mean)'
		}
		
		* Transform x and y into logs/levels as instructed
		if "`log_y'" ~= "" {
			replace y_r = log(y_r)
		}
		if "`level_x'" ~= "" {
			replace fz = exp(fz)
		}
		
	}
	
	qui gen byte `tot_sample' = `y_sample' & `x_sample'
	count if `tot_sample' == 1	
	
	// create the polynomial in f(z)
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen fz_`i' = fz^`i'
		}
	}

	// Run the regression and save it in a polynomial file
	`regtype' `y' fz* `wt', `absorb'
	predict splinepredicted
	tempfile splinepredicted
	gen xspline = fz
	if "`log_y'" ~= "" {
			replace splinepredicted = log(splinepredicted)
		}
		if "`level_x'" ~= "" {
			replace xspline= exp(xspline)
		}
	save "`splinepredicted'" // file with the prediction.
	
	* Binscatter
	if "`savedata'" == "" {
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("temp") replace bgcolor(white)
	}
	else { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins')  title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("`savedata'") replace bgcolor(white)
	}
	
	// Load the binscattered data
	if "`savedata'" == "" {
		clear
		do temp.do
		erase temp.do
		erase temp.csv
	}
	else{
		clear
		do "`savedata'.do"
	}
	
	// Append with polynomial
	ren fz x_binscatter
	ren y_r y_binscatter
	append using "`splinepredicted'"
	
	// Created binscatter with overlaid polynomial in "splinepredicted"
	qui sum x_binscatter
	keep if (xspline > `r(min)' & xspline < `r(max)') | xspline == .
	sort xspline
	// The "if mod(_n,1000)" is so that we do not use all the x points and create a file that is too large with too many data points instead use every 1000th data point.
	twoway (scatter y_binscatter x_binscatter) (line splinepredicted xspline if mod(_n,1000) == 0, lcolor(gs10)), legend(off) title("`title'") xti("`xtitle'") yti("`ytitle'") graphregion(fcolor(white)) bgcolor(white)

	if "`savegraph'" ~= "" {
		graph export "`savegraph'.`graphtype'", replace
	}
	
	restore
end

/* 7) semipar_iv_visual_poly_se
SAME AS ABOVE EXCEPT OVERLAYING A LOCAL POLYNOMIAL FIT, NO FIRST STAGE PLOT
ALSO INCLUDES 95% CONFIDENCE INTERVALS FROM bootstrap - CLUSTERING ON CLUSTER
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
    nbins(interger) - number of bins
	winsorfs: Winsorize the first stage plot.  Does not do anything here becasue first stage plot is not displayed, but kept for consistent syntax.
	fs_order(interger) - order of pollynomial for first stage and control functions 
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.

*/    

cap program drop semipar_iv_visual_poly_se
program semipar_iv_visual_poly_se
* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) nbins(integer 20) winsorfs poly_order(integer 3) fs_order(integer 3) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) addxmean addymean log_y level_x cluster(varlist) bootstrapreps(integer 100) b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
	
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		di "absorb: `absorb'"
		cap drop toabsorb
		local regtype `"areg"'
		qui egen toabsorb = group(`absorb')
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* Get Weightvar
	local wtvar: word 2 of `wt'
	local wtvar = subinstr("`wtvar'","]","",1)
	
	* For speed preserve and keep only what I need
	preserve
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
		drop b_by_group
	}
	
	* Keep only what is necessary
		if "`regtype'" == "areg" {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb `cluster'
		}
		else {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb `cluster'
		}
		
		qui keep if `touse'
		
		foreach var of varlist `controls' {
			drop if `var' == .
		}
		
		* Cluster logic
		if "`cluster'" ~= "" {
			local cluster = "cluster(`cluster')"
		}
		
		count if `touse'
		// drop fixed effects cells with less observations than minfecell
		qui egen countfecell = count(toabsorb), by(toabsorb)
		drop if countfecell < `minfecell'

		* First stage
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt', meanonly
			qui replace fz = fz-`r(mean)'
		}
	qui{
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" {
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}
		
		if `pctiletodrop' ~= 0 & "`winsorfs'" ~= "" {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile `x', percentiles (`pctiletodrop' `toppctiletodrop')
			gen towins = `x' < `r(r1)' | `x' > `r(r2)'
			local winsorfs = "& towins == 0"
		}	
		
		// Note First stage binscatters are commented out here. For a first_stage binscatter use version without polynomial.	
		if "`savegraph'" ~= "" {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(a. First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) savegraph("`savegraph'_firststage.`graphtype'") bgcolor(white) graphregion(fcolor(white))
		}
		else {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(a. First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) bgcolor(white) graphregion(fcolor(white))
		}
		cap drop winsorfs
		
	
		* Drop percentiles from either end of x variable before making figure if included in command
		if `pctiletodrop' ~= 0 {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
			drop if fz < `r(r1)' | fz > `r(r2)'
		}
		
		count if `x_sample' == 1
		
		* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb), by(toabsorb)
		drop if count_toabsorb == 1
		
		count if `x_sample' == 1
		
		* Create `nbins' bins and residualize y on dummies and bins
		fastxtile x_xtile = fz `wt' if `x_sample', nq(`nbins')
		qui `regtype' `y' i.x_xtile `wt' if `touse', `absorb'
		* Use predict to recover the values for each xtile.
		qui predict y_r if `touse'
		qui gen byte `y_sample' = e(sample)
		if "`addymean'" == "" {
			qui summarize `y' `wt' if `touse', meanonly
			qui replace y_r=y_r-`r(mean)'
		}
		
		* Transform x and y into logs/levels as instructed
		if "`log_y'" ~= "" {
			replace y_r = log(y_r)
		}
		if "`level_x'" ~= "" {
			replace fz = exp(fz)
		}
		
	}
	
	qui gen byte `tot_sample' = `y_sample' & `x_sample'
	count if `tot_sample' == 1	
	
	// create the polynomial in f(z)
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen fz_`i' = fz^`i'
		}
	}

	* Run the regression in the bootstrapped sample for the standard errors
	tempfile bootstraps
	* Note: The "right" way to do this is to use the clusterid and group options of
	* the bootstrap command, which create a new identifier for each separate cluster
	* for the estimation command. This makes it so that when one cluster is sampled multiple
	* times, each copy of the ecluster is treated as a separate version for fixed effects.
	* However, all I do with the zip5 variable within zip3 clusters is demean (zip5 x date FE), and this is the 
	* same transformation regardless of whether I treat all of the versions of a cluster as the same ZIP5 or not.
	* Thus this extra step is not necessary, and I can leave out the clusterid and group options.
	bootstrap, reps(`bootstrapreps') seed(1000) force `cluster' saving("`bootstraps'"): areg_wrapper `y' fz*, `absorb' wtvar(`wtvar')
	estat bootstrap, percentile

	// Run the regression and save it in a polynomial file
	`regtype' `y' fz* `wt', `absorb'
	predict splinepredicted
	tempfile splinepredicted
	gen xspline = fz
	if "`log_y'" ~= "" {
			replace splinepredicted = log(splinepredicted)
		}
		if "`level_x'" ~= "" {
			*replace xspline= exp(xspline)
		}
	save "`splinepredicted'", replace // file with the prediction.
	
	* Now create SE bands from bootstrap
	keep xspline splinepredicted
	keep if mod(_n,1000) == 0 // create the standard error bands only for every 1000th x varialbe so the file sizes are not too big.
	gen num = _n
	cross using "`bootstraps'"
	if `poly_order' == 2 {	
		gen spline_predicted_bootstrap = _b_cons + xspline*_b_fz + (xspline^2)*_b_fz_2
	}
	if `poly_order' == 3 {	
		gen spline_predicted_bootstrap = _b_cons + xspline*_b_fz + (xspline^2)*_b_fz_2 + (xspline^3) * _b_fz_3
	}
	if `poly_order' == 4 {	
		gen spline_predicted_bootstrap = _b_cons + xspline*_b_fz + (xspline^2)*_b_fz_2 + (xspline^3) * _b_fz_3 + (xspline^4) * _b_fz_4
	}
	if `poly_order' == 5 {	
		gen spline_predicted_bootstrap = _b_cons + xspline*_b_fz + (xspline^2)*_b_fz_2 + (xspline^3) * _b_fz_3 + (xspline^4) * _b_fz_4 + (xspline^5) * _b_fz_5
	}
	sort num spline_predicted_bootstrap
	by num: gen percentile = _n / `bootstrapreps'
	
	// Create upper and lower
	gen lower = percentile <= .025
	gen upper = percentile > .975
	
	// Shave off edges if thigns go too high or too low (doesn't happen)
	egen maxlower = max(percentile) if lower == 1
	egen minupper = min(percentile) if upper == 1
	keep if percentile == maxlower | percentile == minupper
	replace percentile = 25 if percentile == maxlower
	replace percentile = 975 if percentile == minupper
	drop lower upper minupper maxlower
	
	// Reshape things so can be merged int.
	drop _b* 
	reshape wide spline_predicted_bootstrap, i(num splinepredicted xspline) j(percentile)
	tempfile splinepredicted2
	save "`splinepredicted2'", replace // Save this in splinepredicted2, which will give us the standard errors.
	
	use "`splinepredicted'", clear

	* Binscatter
	if "`savedata'" == "" {
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins') title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("temp") replace bgcolor(white) graphregion(fcolor(white))
	}
	else { 
		qui binscatter y_r fz `wt' if `touse', linetype(none) nbins(`nbins')  title("`title'") xti("`xtitle'") yti("`ytitle'") savedata("`savedata'") replace bgcolor(white) graphregion(fcolor(white))
	}
	
	// Load the binscattered data
	if "`savedata'" == "" {
		clear
		do temp.do
		erase temp.do
		erase temp.csv
	}
	else{
		clear
		do "`savedata'.do"
	}
	
	// Append with polynomial and standard error bands.
	ren fz x_binscatter
	ren y_r y_binscatter
	append using "`splinepredicted2'"
		
	qui sum x_binscatter
	keep if (xspline > `r(min)' & xspline < `r(max)') | xspline == .
	
	// Some things to make sure the standard errors don't screw up the axes.
	qui sum spline_predicted_bootstrap25
	local ymin = floor(`r(min)'*1000)/1000
	local ylabelmin = floor(`r(min)'*50)/50
	qui sum spline_predicted_bootstrap975
	local ymax = ceil(`r(max)'*1000)/1000
	local ylabelmax = ceil(`r(max)'*50)/50
	sort xspline
	// Create teh combined figure
	twoway (scatter y_binscatter x_binscatter) (line splinepredicted xspline, lcolor(gs10))  (line spline_predicted_bootstrap25 xspline, lcolor(gs13))  (line spline_predicted_bootstrap975 xspline, lcolor(gs13)), legend(off) title("`title'") xti("`xtitle'") yti("`ytitle'") yscale(range(`ymin' `ymax')) ylabel(`ylabelmin'[.02]`ylabelmax') graphregion(fcolor(white)) bgcolor(white)
	
	// save graph
	if "`savegraph'" ~= "" {
		graph export "`savegraph'.`graphtype'", replace
	}
	
	restore
end

// This is just a wrapper on areg with weights to allow it to be bootstrapped with and without weights.

cap program drop areg_wrapper
program areg_wrapper

* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist [if] [, absorb(varlist) wtvar(varlist)]
	
	marksample touse

	if "`wtvar'" == "" {
		local wt = ""
	}
	else {
		local wt = "[aw = `wtvar']"
	}
	
	if "`absorb'" ~= "" {
		areg `varlist' `wt' if `touse', absorb(`absorb')
	}
	else {
		reg `varlist' `wt' if `touse'
	}
end

/* 8) semipar_iv_visual_topo
Topographical - overlay lines from minweeks weeks to maxweeks weeeks split by byweeks weeks
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	fs_order(interger) - order of pollynomial for first stage and control functions 
	winsorfs: Winsorize the first stage plot.  Does not do anything here becasue first stage plot is not displayed, but kept for consistent syntax.
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/    

cap program drop semipar_iv_visual_topo
program semipar_iv_visual_topo
* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) winsorfs poly_order(integer 3) fs_order(integer 3) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) addxmean addymean log_y level_x minweeks(real 8) maxweeks(real 16) byweeks(real 2) b_by(varlist numeric)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
	
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		di "absorb: `absorb'"
		cap drop toabsorb
		local regtype `"areg"'
		qui egen toabsorb = group(`absorb')
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* Get Weightvar
	local wtvar: word 2 of `wt'
	local wtvar = subinstr("`wtvar'","]","",1)
	
	* For speed preserve and keep only what I need
	preserve
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
		drop b_by_group
	}
	
	* Keep only what is necessary
		if "`regtype'" == "areg" {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb matched weeks_on_market1
		}
		else {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb matched weeks_on_market1
		}
		
		qui keep if `touse'
		
		foreach var of varlist `controls' {
			drop if `var' == .
		}
		
		count if `touse'
		// drop fixed effects cells with less observations than minfecell
		qui egen countfecell = count(toabsorb), by(toabsorb)
		drop if countfecell < `minfecell'

		* First stage
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt', meanonly
			qui replace fz = fz-`r(mean)'
		}
	qui{
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" {
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}
		
		if `pctiletodrop' ~= 0 & "`winsorfs'" ~= "" {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile `x', percentiles (`pctiletodrop' `toppctiletodrop')
			gen towins = `x' < `r(r1)' | `x' > `r(r2)'
			local winsorfs = "& towins == 0"
		}	

		// Note First stage binscatters are commented out here. For a first_stage binscatter use version without polynomial.
		if "`savegraph'" ~= "" {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) savegraph("`savegraph'_firststage.`graphtype'")
		}
		else {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none)
		}
		cap drop winsorfs
		
	
		* Drop percentiles from either end of x variable before making figure if included in command
		if `pctiletodrop' ~= 0 {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
			drop if fz < `r(r1)' | fz > `r(r2)'
		}
		
		count if `x_sample' == 1
		
		* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb), by(toabsorb)
		drop if count_toabsorb == 1
		
		count if `x_sample' == 1
		
	}
		
	qui gen byte `tot_sample' = `x_sample'
	count if `tot_sample' == 1	
	
	// Create polynomial
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen fz_`i' = fz^`i'
		}
	}

	drop sell_within*
	
	local lcolor = ""
	
	// Loop through and determine whether sold withing X weeks, and then run second stage regression on indicator for this.
	forvalues ii = `minweeks'(`byweeks')`maxweeks' {
		gen sell_within`ii' = weeks_on_market1 <= `ii' & matched ~= 2
	
		`regtype' sell_within`ii' fz* `wt', `absorb'
		predict splinepredicted`ii'
		
		local lcolor = "`lcolor' " + "gs10"
		
	}
	gen xspline = fz
	
	sort xspline
	
	// Create figure of all predicted lines and the x's for the spline.
	// The "if mod(_n,1000)" is so that we do not use all the x points and create a file that is too large with too many data points instead use every 1000th data point.
	line splinepredicted* xspline if mod(_n,1000) == 0, lcolor(`lcolor') legend(off) title("`title'") xti("`xtitle'") yti("`ytitle'")  graphregion(fcolor(white))

	if "`savegraph'" ~= "" {
		graph export "`savegraph'.`graphtype'", replace
	}
	
	restore
end

/* 9) semipar_iv_table_poly
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	controlsfv(varlsit numeric) - ONE variable that can be introduced as a factor variable in controls
	fs_order(interger) - order of pollynomial for first stage and control functions 
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/    

cap program drop semipar_iv_table_poly
program semipar_iv_table_poly
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist) minfecell(integer 0) controls(varlist numeric) controlsfv(varlist numeric) fs_order(integer 2) pctiletodrop(real 0) addxmean addymean level_x b_by(varlist numeric) poly_order(integer 2)]

	qui{
		* Mark sample (reflects the if conditions, and includes only nonmissing observations)
		marksample touse

		* Parse varlist
		local y : word 1 of `varlist'
		local x : word 2 of `varlist'
		local z : word 3 of `varlist'
		
		
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
			cap drop toabsorb
			local regtype `"areg"'
			egen toabsorb = group(`absorb')
			local absorb `"absorb(toabsorb)"'
		}
		else {
			local regtype `"reg"'
		}
		
		* Get Weightvar
		local wtvar: word 2 of `wt'
		local wtvar = subinstr("`wtvar'","]","",1)
		
		* For speed preserve and keep only what I need
		preserve
		
		* Alternate controls if b_by is nonempty
		if "`b_by'" ~= "" {
			egen b_by_group = group(`b_by')
			qui levelsof b_by_group, local(levels)
			local controls2 = ""
			foreach var in `controls' {
				foreach level in `levels' {
					qui gen `var'_`level' = `var' if b_by_group == `level'
					qui replace `var'_`level' = 0 if b_by_group ~= `level'
					qui replace `var'_`level' = . if `var' == .
					local controls2 = "`controls2' `var'_`level'"
				}
			}
			di "`controls2'"
			local controls = "`controls2'"
			drop b_by_group
		}
	}
	
	* Keep only what is necessary
	if "`regtype'" == "areg" {
		qui keep `x' `y' `z' `controls' `controlsfv' `wtvar' `touse' toabsorb
	}
	else {
		qui keep `x' `y' `z' `controls' `controlsfv' `wtvar' `touse' toabsorb
	}
	
	foreach var of varlist `controls' `controlsfv' {
		drop if `var' == .
	}
	
	// drop fixed effects cells with less observations than minfecell
	qui egen countfecell = count(toabsorb), by(toabsorb)
	drop if countfecell < `minfecell'
	
	// Run semipar_iv_table_poly_toboot for estimation.
	semipar_iv_table_poly_toboot `y' `x' `z' `wt' if `touse', `absorb' controls(`controls') controlsfv(`controlsfv') fs_order(`fs_order') poly_order(`poly_order') pctiletodrop(`pctiletodrop') `addxmean' `addymean' `level_x'
	restore
end

/* 10) semipar_iv_table_poly_se
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	controlsfv(varlsit numeric) - ONE variable that can be introduced as a factor variable in controls
	fs_order(interger) - order of pollynomial for first stage
	bootstrapreps - number of bootstrap reps for calculating se
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
*/    

cap program drop semipar_iv_table_poly_se
program semipar_iv_table_poly_se
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist) minfecell(integer 0) controls(varlist numeric) controlsfv(varlist numeric) fs_order(integer 2) poly_order(integer 2) bootstrapreps(integer 100) pctiletodrop(real 0) addymean addxmean level_x cluster(varlist) b_by(varlist numeric)]
	
	qui {
		* Mark sample (reflects the if conditions, and includes only nonmissing observations)
		marksample touse

		* Parse varlist
		local y : word 1 of `varlist'
		local x : word 2 of `varlist'
		local z : word 3 of `varlist'
		
		
		* Parse absorb to define the type of regression to be used
		if `"`absorb'"'!="" {
			cap drop toabsorb
			local regtype `"areg"'
			egen toabsorb = group(`absorb')
			local absorb `"absorb(toabsorb)"'
		}
		else {
			local regtype `"reg"'
		}
		
		* Get Weightvar
		local wtvar: word 2 of `wt'
		local wtvar = subinstr("`wtvar'","]","",1)
		
		* For speed preserve and keep only what I need
		preserve
	
		* Alternate controls if b_by is nonempty
		if "`b_by'" ~= "" {
			egen b_by_group = group(`b_by')
			qui levelsof b_by_group, local(levels)
			local controls2 = ""
			foreach var in `controls' {
				foreach level in `levels' {
					qui gen `var'_`level' = `var' if b_by_group == `level'
					qui replace `var'_`level' = 0 if b_by_group ~= `level'
					qui replace `var'_`level' = . if `var' == .
					local controls2 = "`controls2' `var'_`level'"
				}
			}
			di "`controls2'"
			local controls = "`controls2'"
			drop b_by_group
		}
	}
	* Keep only what is necessary
	
	if "`regtype'" == "areg" {
		qui keep `x' `y' `z' `controls' `controlsfv'  `wtvar' `touse' toabsorb `cluster'
	}
	else {
		qui keep `x' `y' `z' `controls' `controlsfv' `wtvar' `touse' toabsorb `cluster'
	}
	
	foreach var of varlist `controls' `controlsfv' {
		drop if `var' == .
	}
	
	// drop fixed effects cells with less observations than minfecell
	qui egen countfecell = count(toabsorb), by(toabsorb)
	drop if countfecell < `minfecell'
	
	* Cluster logic
	if "`cluster'" ~= "" {
		local cluster = "cluster(`cluster')"
	}
	
	* Now run bootstrapreps bootsraps of semipar_iv_table_poly_forbootstrap for the estimation
	* Note: The "right" way to do this is to use the clusterid and group options of
	* the bootstrap command, which create a new identifier for each separate cluster
	* for the estimation command. This makes it so that when one cluster is sampled multiple
	* times, each copy of the ecluster is treated as a separate version for fixed effects.
	* However, all I do with the zip5 variable within zip3 clusters is demean (zip5 x date FE), and this is the 
	* same transformation regardless of whether I treat all of the versions of a cluster as the same ZIP5 or not.
	* Thus this extra step is not necessary, and I can leave out the clusterid and group options.
	bootstrap , reps(`bootstrapreps') seed(1000) force `cluster': semipar_iv_table_poly_toboot `y' `x' `z' `wt' if `touse', `absorb' controls(`controls') controlsfv(`controlsfv') n(3) fs_order(`fs_order') poly_order(`poly_order') pctiletodrop(`pctiletodrop') `addxmean' `addymean' `level_x'
	estat bootstrap, percentile
	restore
end

/* 11) semipar_iv_table_poly_toboot
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
    controls(varlist) - adds varlist as control variables
    controlsfv(varlsit numeric) - ONE variable that can be introduced as a factor variable in controls
	n(interger) - number bins to do linear regression on
	fs_order(interger) - order of pollynomial for first stage and control functions 
	drop1pctile - drops 1 percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
*/    

cap program drop semipar_iv_table_poly_toboot
program semipar_iv_table_poly_toboot, eclass
	
	* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist) controls(varlist numeric) controlsfv(varlist numeric) n(integer 10) fs_order(integer 2) poly_order(integer 3) pctiletodrop(real 0) addxmean addymean level_x]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse
	keep if `touse'

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
		
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		local regtype `"areg"'
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* add controlsfv to controls
	if "`controlsfv'" ~= "" {
		tokenize "`controlsfv'"
		while "`1'" != "" {
			local controls = "`controls' i.`1'" 
			macro shift
		}
	}
	di "Controls: `controls'"

		* First stage
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt', meanonly
			qui replace fz = fz-`r(mean)'
		}
		
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" {
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}

	count if `x_sample'
	
	* Drop percentiles from either end of x variable before making figure if included in command
	if `pctiletodrop' ~= 0 {
		local toppctiletodrop = 100 - `pctiletodrop'
		_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
		gen pctiletodrop =  fz < `r(r1)' | fz > `r(r2)'
		tab pctiletodrop
	}
	else {
		gen pctiletodrop = 0
	}
	
	count if `x_sample' & pctiletodrop == 0
	
	* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb) if pctiletodrop == 0, by(toabsorb)
		replace pctiletodrop = 1 if count_toabsorb == 1
		drop count_toabsorb
	
	count if `x_sample' & pctiletodrop == 0
	
	* Transform x into levels if instructed
	if "`level_x'" ~= "" {
		replace fz = exp(fz)
	}
	
	* Create polynomial
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen fz_`i' = fz^`i'
		}
	}
	
	// run the regression
	`regtype' `y' fz* `wt' if pctiletodrop == 0, `absorb'
	tempname bb
	matrix `bb' = e(b)
	
	// create thigns to return.
	mark tot_sample if `x_sample'
	ereturn post `bb', esample(tot_sample)
	ereturn local cmd ="bootstrap"
	drop fz* pctiletodrop
	cap drop z_*
	// For info on how to do a bootsrap and why this is right see:
	// http://www.stata.com/support/faqs/statistics/bootstrapping-vectors/
end

/* 12) semipar_iv_visual_geo
Polynomial fit by geograph overlaid
 Syntax:
    semipar visual y x z
    accepts if and weights
    absorb(varlist) - creates categorical variables by groups of unqiue values of every combination of varlist and includes as fixed effects
	minfecell(integer) - drop fixed effect cells with less than this many people in them to prevent imprecisely estimated fixed effects from biasing things
    controls(varlist) - adds varlist as control variables
	winsorfs: Winsorize the first stage plot. Does not do anything here becasue first stage plot is not displayed, but kept for consistent syntax.
    nbins(interger) - number of bins
	fs_order(interger) - order of pollynomial for first stage and control functions 
    title, xtitle, ytitle - for labeling graphs
    savegraph, graphtype - string for graph saving.  graphtype is the extension.  so savegraph(name) graphtype(wmf)
    savedata - saves the data from the binscatter
	pctiletodrop(real) - drops the percentile on either end based on x variable after residualizing
	addymean - add in y mean
	addxmean - add in x mean
	log_y - y axis in logs (assuming that it is usually in levels) AFTER estimation
	level_x - x axis in levels (assuming that it is usually in logs) AFTER estimation
	poly_order - order of polynomial fit
	b_by(varlist) - Runs the controls by groups defined by the varlist. If blank, the controls are used as is.
	geo(varlist numeric) Geography variable
	min_geo - minimum number of observations in a geography for it to be used.
*/    

cap program drop semipar_iv_visual_geo
program semipar_iv_visual_geo
* Parse weights, if any
	_parsewt "fweight aweight pweight" `0' 
	local 0  "`s(newcmd)'" /* command minus weight statement */
	local wt "`s(weight)'"  /* contains [weight=exp] or nothing */
	
	syntax varlist(min=3 max=3) [if] [, absorb(varlist numeric) minfecell(integer 0) controls(varlist numeric) nbins(integer 20) winsorfs poly_order(integer 3) fs_order(integer 3) title(string) xtitle(string) ytitle(string) savegraph(string) savedata(string) graphtype(string) pctiletodrop(real 0) addxmean addymean log_y level_x b_by(varlist numeric) geo(varlist numeric) min_geo(integer 500)]
	
	* Mark sample (reflects the if conditions, and includes only nonmissing observations)
	marksample touse

	* Parse varlist
	local y : word 1 of `varlist'
	local x : word 2 of `varlist'
	local z : word 3 of `varlist'
	
	
	* Parse absorb to define the type of regression to be used
	if `"`absorb'"'!="" {
		di "absorb: `absorb'"
		cap drop toabsorb
		local regtype `"areg"'
		qui egen toabsorb = group(`absorb')
		local absorb `"absorb(toabsorb)"'
	}
	else {
		local regtype `"reg"'
	}
	
	* Get Weightvar
	local wtvar: word 2 of `wt'
	local wtvar = subinstr("`wtvar'","]","",1)
	
	* For speed preserve and keep only what I need
	preserve
	
	* Alternate controls if b_by is nonempty
	if "`b_by'" ~= "" {
		egen b_by_group = group(`b_by')
		qui levelsof b_by_group, local(levels)
		local controls2 = ""
		foreach var in `controls' {
			foreach level in `levels' {
				qui gen `var'_`level' = `var' if b_by_group == `level'
				qui replace `var'_`level' = 0 if b_by_group ~= `level'
				qui replace `var'_`level' = . if `var' == .
				local controls2 = "`controls2' `var'_`level'"
			}
		}
		di "`controls2'"
		local controls = "`controls2'"
		drop b_by_group
	}
	
	* Keep only what is necessary
		if "`regtype'" == "areg" {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb `geo'
		}
		else {
			qui keep `x' `y' `z' `controls' `wtvar' `touse' toabsorb `geo'
		}
		
		qui keep if `touse'
		
		foreach var of varlist `controls' {
			drop if `var' == .
		}
		
		count if `touse'
		// drop fixed effects cells with less observations than minfecell
		qui egen countfecell = count(toabsorb), by(toabsorb)
		drop if countfecell < `minfecell'

		* First stage: Here it is pooled. Only the second stage is by geography.
		local z_list = ""
		if `fs_order' > 1 {
			forvalues i = 2/`fs_order' {
				qui gen z_`i' = `z'^`i'
				local z_list = "`z_list' z_`i'"
			}
			`regtype' `x' `z' z_* `controls' `wt' if `touse', `absorb'
			test `z' `z_list'
			partpred fz, for(`z' `z_list')
			qui sum fz `wt', meanonly
			replace fz = fz-`r(mean)'
		}
		else {
			qui `regtype' `x' `z' `controls' `wt' if `touse', `absorb'
			test `z'
			partpred fz, for(`z')
			qui sum fz `wt', meanonly
			qui replace fz = fz-`r(mean)'
		}
	qui{
		tempvar x_sample y_sample tot_sample
		gen byte `x_sample' = e(sample)
		if "`addxmean'" ~= "" {
			qui summarize `x' `wt' if `touse', meanonly
			qui replace fz=fz+`r(mean)'
		}
		
		if `pctiletodrop' ~= 0 & "`winsorfs'" ~= "" {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile `x', percentiles (`pctiletodrop' `toppctiletodrop')
			gen towins = `x' < `r(r1)' | `x' > `r(r2)'
			local winsorfs = "& towins == 0"
		}	
		
		// Note First stage binscatters are commented out here. For a first_stage binscatter use version without polynomial.
		
		if "`savegraph'" ~= "" {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none) savegraph("`savegraph'_firststage.`graphtype'")
		}
		else {
			*binscatter `x' `z' `wt' if `touse' `winsorfs', nbins(25) title(First Stage) xtitle(Instrument) ytitle("Log Initial List Price Relative to Mean") controls(`controls') absorb(toabsorb) linetype(none)
		}
		cap drop winsorfs
		
	
		* Drop percentiles from either end of x variable before making figure if included in command
		if `pctiletodrop' ~= 0 {
			local toppctiletodrop = 100 - `pctiletodrop'
			_pctile fz `wt', percentiles (`pctiletodrop' `toppctiletodrop')
			drop if fz < `r(r1)' | fz > `r(r2)'
		}
		
		count if `x_sample' == 1
		
		* Drop entirely unique observations
		egen count_toabsorb = count(toabsorb), by(toabsorb)
		drop if count_toabsorb == 1
		
		count if `x_sample' == 1
		
	}
	
	qui gen byte `tot_sample' = `y_sample' & `x_sample'
	count if `tot_sample' == 1	
	
	// create polynomial
	if `poly_order' > 1 {
		forvalues i = 2/`poly_order' {
				qui gen fz_`i' = fz^`i'
		}
	}
	
	// Get all of the values of "geo", which is the geography variable
	levelsof `geo', local(geos)
	gen splinepredicted = .
	local line_code = ""
	gen xspline = fz
	di "min_geo = `min_geo'"
	foreach g in `geos' { // Loop through the geographies
		count if `geo' == `g'
		if `r(N)' >= `min_geo' { // if you have enough observations (more than min_geo) run the regression, create a prediction
			`regtype' `y' fz* `wt' if `geo' == `g', `absorb' 
			predict temp
			replace splinepredicted = temp if `geo' == `g'
			drop temp
			local line_code = "`line_code'" + " (line splinepredicted xspline if `geo' == `g')" // the prediction gets added to the next line
			qui sum xspline if `geo' == `g', d
			drop if (xspline < `r(p1)' | xspline > `r(p99)') & `geo' == `g' // drop the extreme edges by geography
		}
	}
	sort `geo' xspline // sort things
	by `geo': gen modulo_n = _n 
	keep if mod(modulo_n,10) == 1 // Using every 10th observation reduces file size
	
	// Run the line code
	twoway `line_code', legend(off) title("`title'") xti("`xtitle'") yti("`ytitle'") graphregion(fcolor(white)) bgcolor(white)

	if "`savegraph'" ~= "" {
		graph export "`savegraph'.`graphtype'", replace
	}
	
	restore
end
